rm(list=ls())

library(data.table)
library(xts)
library(argo)
library(forecast)
library(xlsx)
library(Metrics)
library(xtable)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))


FIPS_Loc = read.csv("locations.csv")
State_FIPS_Loc = FIPS_Loc[1:52,]

# Load in ALL Teams
FIPS_Loc = read.csv("locations.csv")
State_FIPS_Loc = FIPS_Loc[1:52,] #only include 51 states, no islands. National is called "US"
paste_target = " day ahead inc hosp"
dates_tested<-seq(as.Date("2021-09-01"),as.Date("2022-02-01"),by=1)
# Assign lists and matrix for storing
ALL_Teams = list() # creat a list to store all the teams
for (dir in list.dirs()[-1]) { #for each team, it has four list of 1,2,3,4 weeks ahead inc death forecast. Each one is a matrix of num_dates rows and num_states and national (52) columns
  ALL_Teams[[dir]] = list()
  for(i in 1:14){
    ALL_Teams[[dir]][[paste0(i,paste_target)]] = xts(matrix(NA, length(dates_tested), dim(State_FIPS_Loc)[1]), order.by=dates_tested )
    colnames(ALL_Teams[[dir]][[paste0(i,paste_target)]]) = as.character(State_FIPS_Loc$abbreviation)}
  
}
names(ALL_Teams) = gsub("./", "",list.dirs()[-1]) 
All_Teams_Weekly<-list()
#load('CDC_groups_results.rda')
# loop through the sub directories (use [-1] to lop off the current directory: ".")
for (dir in list.dirs()[-1]) {
  print(dir)
  temp_folder = dir 
  temp_files = list.files(temp_folder) #all csv files 
  for (temp_f in temp_files){ # loop through all files in that team
    print(temp_f)
    if (grepl(".csv",temp_f)&&as.Date(substr(temp_f,1,10))>=as.Date("2021-09-01")){ # there exists .txt files, ignore those
      teams_reporting <- read.csv(file.path(temp_folder, temp_f)) #get search word (common cold) frequency csv file
      temp_target_week = unique(teams_reporting[,c("target","target_end_date")])
      for (i in 1:14){ # for each i weeks ahead inc death 
        temp_target_workon = paste0(i,paste_target) #current target working on, "xxx week ahead inc death
        if (length(which(teams_reporting$target==temp_target_workon)) >=1){ # Enter only if the target week is reported
          for (loc_idx in 1:dim(State_FIPS_Loc)[1]){ # for each locations reporting
            location_FIPS = as.character(State_FIPS_Loc[loc_idx,"location"]) # get the current location's FIPS code
            if (loc_idx <=9){temp_location_FIPS = gsub("0","",location_FIPS)} else{temp_location_FIPS = 100} # location FIPS code from 1 to 9 might not have leading 0, if not 1 to 9 let it be a impossible value, 100
            if (length(which(teams_reporting$location == location_FIPS)) >=1 ){ #Enter only if locations are reported
              temp_date = as.Date(as.character(temp_target_week[temp_target_week$target==temp_target_workon,"target_end_date"])) #since each xxx weeks ahead inc death associate with unique target end week. Get that target end week
              if (any(teams_reporting$target==temp_target_workon & as.Date(teams_reporting$target_end_date)==temp_date & teams_reporting$type=="point" & teams_reporting$location==location_FIPS)){
                ALL_Teams[[gsub("./", "",temp_folder)]][[temp_target_workon]][temp_date, as.character(State_FIPS_Loc[loc_idx,"abbreviation"]) ] = 
                  teams_reporting[c(teams_reporting$target==temp_target_workon & as.Date(teams_reporting$target_end_date)==temp_date & teams_reporting$type=="point" & teams_reporting$location==location_FIPS),"value"]
              }
            }else if (length(which(teams_reporting$location == temp_location_FIPS)) >=1){
              temp_date = as.Date(as.character(temp_target_week[temp_target_week$target==temp_target_workon,"target_end_date"])) #since each xxx weeks ahead inc death associate with unique target end week. Get that target end week
              if (any(teams_reporting$target==temp_target_workon & as.Date(teams_reporting$target_end_date)==temp_date & teams_reporting$type=="point" & teams_reporting$location==temp_location_FIPS)){
                ALL_Teams[[gsub("./", "",temp_folder)]][[temp_target_workon]][temp_date, as.character(State_FIPS_Loc[loc_idx,"abbreviation"]) ] = 
                  teams_reporting[c(teams_reporting$target==temp_target_workon & as.Date(teams_reporting$target_end_date)==temp_date & teams_reporting$type=="point" & teams_reporting$location==temp_location_FIPS),"value"]
              }
            }
          }
        }
      }
    }
  }
  #AGGREGATE into weekly prediciton
  #one_week_ahead
  curr_pred=ALL_Teams[[gsub("./", "",temp_folder)]][[1]]
  index(curr_pred)<-index(curr_pred)-1
  for(i in 2:7){
    temp_pred=ALL_Teams[[gsub("./", "",temp_folder)]][[i]]
    index(temp_pred)<-index(temp_pred)-i
    curr_pred<-curr_pred+temp_pred
  }
  All_Teams_Weekly[[gsub("./", "",temp_folder)]][['1 weeek ahead']]<-na.omit(curr_pred)
  #two_week_ahead
  curr_pred=ALL_Teams[[gsub("./", "",temp_folder)]][[8]]
  index(curr_pred)<-index(curr_pred)-8
  for(i in 9:14){
    temp_pred=ALL_Teams[[gsub("./", "",temp_folder)]][[i]]
    index(temp_pred)<-index(temp_pred)-i
    curr_pred<-curr_pred+temp_pred
  }
  All_Teams_Weekly[[gsub("./", "",temp_folder)]][['2 weeek ahead']]<-na.omit(curr_pred)
  save(All_Teams_Weekly,ALL_Teams,file="CDC_groups_results_new.rda")
}
if(TRUE){
  All_Teams_Weekly_nat<-list()
  All_Teams_Weekly_state<-list()
  load("CDC_groups_results_new.rda")
  #aggregate national and state results separately. 
  for(curr_team in names(ALL_Teams)){
    curr_pred=ALL_Teams[[curr_team]][[1]]
    curr_pred[is.na(curr_pred)]<-0
    index(curr_pred)<-index(curr_pred)-1
    
    curr_pred_nat<-na.omit(curr_pred[,'US'])
    for(i in 2:7){
      temp_pred=ALL_Teams[[curr_team]][[i]]
      index(temp_pred)<-index(temp_pred)-i
      temp_pred[is.na(temp_pred)]<-0# fill NA with 0
      curr_pred<-curr_pred+temp_pred
      curr_pred_nat<-curr_pred_nat+na.omit(temp_pred[,'US'])
    }
    All_Teams_Weekly_state[[curr_team]][['1 weeek ahead']]<-na.omit(curr_pred)
    All_Teams_Weekly_nat[[curr_team]][['1 weeek ahead']]<-na.omit(curr_pred_nat)
    #two_week_ahead
    curr_pred=ALL_Teams[[curr_team]][[8]]
    curr_pred[is.na(curr_pred)]<-0
    index(curr_pred)<-index(curr_pred)-8
    curr_pred_nat<-na.omit(curr_pred[,'US'])
    for(i in 9:14){
      temp_pred=ALL_Teams[[curr_team]][[i]]
      index(temp_pred)<-index(temp_pred)-i
      temp_pred[is.na(temp_pred)]<-0
      curr_pred<-curr_pred+temp_pred
      curr_pred_nat<-curr_pred_nat+na.omit(temp_pred[,'US'])
    }
    All_Teams_Weekly_state[[curr_team]][['2 weeek ahead']]<-na.omit(curr_pred)
    All_Teams_Weekly_nat[[curr_team]][['2 weeek ahead']]<-na.omit(curr_pred_nat)
    #save(All_Teams_Weekly,ALL_Teams,file="CDC_groups_results.rda")
  }
 
}
#do weekly aggregation before calculating MAE
if(FALSE){
  setwd("D:/Covid-19 Data/New folder/Hosp_Prediction")
  
  load("incident_hosp_cases.rda")
  library(ggplot2)
  
  load("national_prediction_hosp_admission_0117.rda")
  national_prediction<-na.omit(list_results_nat_0901[["2 week ahead"]][[1]])
  
  load("CDC_groups_results_combined.rda")
 
  
  pred_target<-national_prediction[,1]
  all_teams_evaluation<-matrix(NA,nrow = length(names(All_Teams_Weekly_nat))+ncol(national_prediction)-1,ncol = 3)
  colnames(all_teams_evaluation)<-c("RMSE","Cor","MAE")
  rownames(all_teams_evaluation)[(length(names(All_Teams_Weekly_nat))+1):nrow(all_teams_evaluation)]<-colnames(national_prediction)[2:ncol(national_prediction)]
  rownames(all_teams_evaluation)[1:length(names(All_Teams_Weekly_nat))]<-names(All_Teams_Weekly_nat)
  curr_dates<-seq(as.Date("2021-01-04"),as.Date("2021-12-27"),by=7)
  
  all_teams_nat<-pred_target[curr_dates]
  for (curr_team in names(All_Teams_Weekly_nat)){
    
   curr_pred_nat<-All_Teams_Weekly_nat[[curr_team]][[2]]
   curr_pred_nat<-curr_pred_nat[curr_dates]
   curr_pred_nat[curr_pred_nat==0]<-national_prediction[index(curr_pred_nat[curr_pred_nat==0]),'naive']
   curr_pred_nat<-na.omit(curr_pred_nat)
   curr_pred_target<-pred_target[index(curr_pred_nat)]
   all_teams_evaluation[curr_team,1]<- rmse(curr_pred_nat,curr_pred_target)
   all_teams_evaluation[curr_team,2]<- cor(curr_pred_nat,curr_pred_target)
   all_teams_evaluation[curr_team,3]<- mae(curr_pred_nat,curr_pred_target)
   colnames(curr_pred_nat)<-curr_team
   all_teams_nat<-cbind(all_teams_nat,curr_pred_nat)
   
  }
  #all_teams_nat<-na.omit(all_teams_nat)
  for(our_method in colnames(national_prediction)[2:ncol(national_prediction)]){
    our_pred<-national_prediction[curr_dates,our_method]
    
    all_teams_evaluation[our_method,1]<- rmse(all_teams_nat[,1],our_pred)
    all_teams_evaluation[our_method,2]<- cor(all_teams_nat[,1],our_pred)
    all_teams_evaluation[our_method,3]<- mae(all_teams_nat[,1],our_pred)
    all_teams_nat<-cbind(all_teams_nat,our_pred)
  }
  all_teams_evaluation<-all_teams_evaluation[order(all_teams_evaluation[,1]),]
  xtable(all_teams_evaluation)
 # all_teams_nat<-na.omit(all_teams_nat)
  #write.xlsx(all_teams_evaluation,file = "All_teams_0103.xlsx",append = TRUE, sheetName = 'two-week')
  
  visual_data<-all_teams_nat[,c("truth","full_model_lasso","AR7_ARIMA700","naive",
                                        "COVIDhub.baseline","COVIDhub.ensemble")]
  colnames(visual_data)<-c("Truth","ARGO","AR7","Naive",
                           "COVIDhub-baseline","COVIDhub-ensemble")
  
  temp_matrix = matrix(NA,length(visual_data),3)
  temp_matrix[,3] = as.numeric(matrix(visual_data,length(visual_data),1))
  temp_matrix[,1] = rep(as.character(index(visual_data)), dim(visual_data)[2])
  temp_matrix[,2] = rep(colnames(visual_data),each=dim(visual_data)[1])
  temp_matrix = data.frame(temp_matrix)
  temp_matrix[,3] = as.numeric(as.character(temp_matrix[,3]))
  colnames(temp_matrix) = c("Week", "Methods",  "Hospitalizations")
  temp_matrix[,1] = as.Date(temp_matrix[,1])
  # Linechart
  Nat_LineChart = ggplot(data=temp_matrix, aes(x=Week, y=Hospitalizations, group=Methods, color=Methods)) + 
    geom_line(aes(size = Methods,linetype=Methods)) +scale_size_manual(values = c(0.4,.6,.4,.4,.4,.6)  ) +
    scale_color_manual(values=c("gold3","red","green4","blue","cyan","black"))+ 
    scale_linetype_manual(values= c(1,1,1,1,1,1))  + theme_minimal()+
    theme(text = element_text(size=10), axis.text.x = element_text(angle=0, hjust=1))  + 
    theme(legend.position = "bottom",legend.text = element_text(size=10),
          legend.key.width= unit(0.8, 'cm'),legend.title = element_text(size=10))+
  scale_x_date(breaks = '1 month',date_labels =  "%b %Y") 
  png(file="test1.png",width=18,height=15,units="cm",res=500)
  print(plot(Nat_LineChart))
  dev.off()
  
  
  ###############################################################
  #state-level prediction
  load("state_results_hosp_admission_nat_gt_0117_c.rda")
  load("CDC_groups_results_combined.rda")
  #dates<-seq(as.Date("2021-01-01"),as.Date("2021-10-01"),by=1)
  All_Teams_Weekly_state[["Karlen-pypm"]]<-NULL
  All_Teams_Weekly_state[["IHME-CurveFit"]]<-NULL
  All_Teams_Weekly_state[["USC-SI_kJalpha"]]<-NULL
  whole_results_new<-list_results_State_0901[['1 week ahead']]
  evalution_metrics_rmse<-matrix(NA,nrow=length(whole_results_new),
                                 ncol=ncol(whole_results_new[["US-IA"]] )+length(names(All_Teams_Weekly_state))-1)
  colnames(evalution_metrics_rmse)[ncol(whole_results_new[["US-IA"]] ):ncol(evalution_metrics_rmse)]<-names(All_Teams_Weekly_state)
  
  colnames(evalution_metrics_rmse)[1:ncol(whole_results_new[["US-IA"]] )-1]<-colnames(whole_results_new[["US-IA"]] )[2:length(colnames(whole_results_new[["US-IA"]] ))]
   states<-names(whole_results_new)
  
  #states<-states[-32]
  rownames(evalution_metrics_rmse)<-states
  
  evalution_metrics_cor<-evalution_metrics_rmse
  evalution_metrics_MAE<-evalution_metrics_rmse
  evalution_metrics_MAPE<-evalution_metrics_rmse
  state_results<-list()
  for (state in states){
    #full model
    #teams reporting to CDC
    curr_dates<-seq(as.Date("2021-01-04"),as.Date("2021-12-27"),by=7)
    
    curr_results<-na.omit(whole_results_new[[state]] )
    
    curr_all_results<-curr_results[curr_dates,'truth']
    for (curr_method in names(All_Teams_Weekly_state)){
      curr_results_CDC<-All_Teams_Weekly_state[[curr_method]][[1]][curr_dates,substr(state,4,5)]
      curr_results_CDC[curr_results_CDC==0]<-curr_results[index(curr_results_CDC[curr_results_CDC==0]),'naive']
      colnames(curr_results_CDC)<-curr_method
      curr_results_truth<-curr_results[curr_dates,'truth']
      curr_all_results<-cbind(curr_all_results,curr_results_CDC)
      evalution_metrics_rmse[state,curr_method]<-rmse(curr_results_truth,curr_results_CDC)
      evalution_metrics_cor[state,curr_method]<-cor(curr_results_truth,curr_results_CDC)
      evalution_metrics_MAE[state,curr_method]<-mae(curr_results_truth,curr_results_CDC)
      evalution_metrics_MAPE[state,curr_method]<-mape(curr_results_truth,curr_results_CDC)
      
    }

    for (curr_method in colnames(curr_results)[2:length(colnames(curr_results))]) {
      curr_all_results<-cbind(curr_all_results,curr_results[,curr_method])
      evalution_metrics_rmse[state,curr_method]<-rmse(curr_results[,1],curr_results[,curr_method])
      evalution_metrics_cor[state,curr_method]<-cor(curr_results[,1],curr_results[,curr_method])
      evalution_metrics_MAE[state,curr_method]<-mae(curr_results[,1],curr_results[,curr_method])
      evalution_metrics_MAPE[state,curr_method]<-mape(curr_results[,1],curr_results[,curr_method])
    }
    state_results[['1 week ahead']][[state]]<-na.omit(curr_all_results)
    
  }
  # evalution_metrics_rmse[which(is.na(evalution_metrics_rmse))]
  # evalution_metrics_cor<-na.omit(evalution_metrics_cor)
  # evalution_metrics_MAE<-na.omit(evalution_metrics_MAE)
  # evalution_metrics_MAPE<-na.omit(evalution_metrics_MAPE)
  
  final_evaluation<-matrix(NA,nrow = ncol(evalution_metrics_rmse),ncol = 3)
  colnames(final_evaluation)<-c("RMSE","Cor","MAE")#,"MAPE")
  rownames(final_evaluation)<-colnames(evalution_metrics_rmse)
  final_evaluation[,1]<-round(colMeans(evalution_metrics_rmse),digits = 3)
  final_evaluation[,2]<-round(colMeans(evalution_metrics_cor),digits = 3)
  final_evaluation[,3]<-round(colMeans(evalution_metrics_MAE),digits = 3)
 # final_evaluation[,4]<-round(colMeans(evalution_metrics_MAPE),digits = 4)
  #rownames(final_evaluation)<-colnames(evalution_metrics_rmse)
  final_evaluation<-final_evaluation[order(final_evaluation[,1]),]
  
  #write.csv(final_evaluation,file = "state-level_one_week_ahead.csv")
  #violin plot of error metrics
  library(tidyr)
  library(ggplot2)
  library(reshape2)
  rmse<-evalution_metrics_rmse[,c("naive_period","full_model_lasso","AR7_ARIMA700",
                             "COVIDhub-ensemble",
                             "COVIDhub-baseline")]
  colnames(rmse)<-c("naive","ARGO","AR7",
   "COVIDhub-ensemble",
   "COVIDhub-baseline")
  rmse<-melt(rmse)
  colnames(rmse)<-c("state","method","RMSE")
  #MAE
  mae<-evalution_metrics_MAE[,c("naive_period","full_model_lasso","AR7_ARIMA700",
                                   "COVIDhub-ensemble",
                                   "COVIDhub-baseline")]
  colnames(mae)<-c("naive","ARGO","AR7",
                     "COVIDhub-ensemble",
                     "COVIDhub-baseline")
  mae<-melt(mae)
  colnames(mae)<-c("state","method","MAE")
  #COR
  COR<-evalution_metrics_cor[,c("naive_period","full_model_lasso","AR7_ARIMA700",
                                 "COVIDhub-ensemble",
                                 "COVIDhub-baseline")]
  colnames(COR)<-c("naive","ARGO","AR7",
                    "COVIDhub-ensemble",
                    "COVIDhub-baseline")
  COR<-melt(COR)
  colnames(COR)<-c("state","method","Cor")
  data_summary_m <- function(x) {
    m <- mean(x)
    ymin <- max(m-sd(x),0)
    ymax <- m+sd(x)
    return(c(y=m,ymin=ymin,ymax=ymax))
  }
  data_summary_c <- function(x) {
    m <- mean(x)
    ymin <- max(m-sd(x),-1)
    ymax <- min(m+sd(x),1)
    return(c(y=m,ymin=ymin,ymax=ymax))
  }
  library(ggpubr)
  a<-ggplot(rmse, aes(x=method, y=RMSE, fill=method),show.legend = FALSE) +ggtitle("One-week-ahead")+
    geom_violin()+theme(axis.title.x=element_blank(),
                        axis.text.x=element_blank(),
                        axis.ticks.x=element_blank())+
    stat_summary(fun.data=data_summary_m, mult=1, geom="pointrange", color="black", size = 0.2)
  b<-ggplot(mae, aes(x=method, y=MAE, fill=method)) +
    geom_violin()+theme(axis.title.x=element_blank(),
                        axis.text.x=element_blank(),
                        axis.ticks.x=element_blank())+
    stat_summary(fun.data=data_summary_m, mult=1, geom="pointrange", color="black", size = 0.2)
  c<-ggplot(COR, aes(x=method, y=Cor, fill=method)) +
    geom_violin()+theme(axis.title.x=element_blank(),
                        axis.text.x=element_blank(),
                        axis.ticks.x=element_blank())+
    stat_summary(fun.data=data_summary_c, mult=1, geom="pointrange", color="black", size = 0.2)
 
  
  ######################
  #two-weeks-ahead
 # load("state_results_final.rda")
  #dates<-seq(as.Date("2021-01-01"),as.Date("2021-10-01"),by=1)

  whole_results_new<-list_results_State_0901[['2 week ahead']]
  evalution_metrics_rmse<-matrix(NA,nrow=length(whole_results_new),
                                 ncol=ncol(whole_results_new[["US-IA"]] )+length(names(All_Teams_Weekly_state))-1)
  colnames(evalution_metrics_rmse)[ncol(whole_results_new[["US-IA"]] ):ncol(evalution_metrics_rmse)]<-names(All_Teams_Weekly_state)
  
  colnames(evalution_metrics_rmse)[1:ncol(whole_results_new[["US-IA"]] )-1]<-colnames(whole_results_new[["US-IA"]] )[2:length(colnames(whole_results_new[["US-IA"]] ))]
  states<-names(whole_results_new)
  
  #states<-states[-32]
  rownames(evalution_metrics_rmse)<-states
  
  evalution_metrics_cor<-evalution_metrics_rmse
  evalution_metrics_MAE<-evalution_metrics_rmse
  evalution_metrics_MAPE<-evalution_metrics_rmse
  for (state in states){
    #full model
    #teams reporting to CDC
    curr_dates<-seq(as.Date("2021-01-04"),as.Date("2021-12-27"),by=7)
    
    curr_results<-na.omit(whole_results_new[[state]] )
   
    curr_all_results<-curr_results[curr_dates,'truth']
    for (curr_method in names(All_Teams_Weekly_state)){
      curr_results_CDC<-All_Teams_Weekly_state[[curr_method]][[2]][curr_dates,substr(state,4,5)]
      curr_results_CDC[curr_results_CDC==0]<-curr_results[index(curr_results_CDC[curr_results_CDC==0]),'naive']#fill missing values with niave
      colnames(curr_results_CDC)<-curr_method
      curr_results_truth<-curr_results[curr_dates,'truth']
      curr_all_results<-cbind(curr_all_results,curr_results_CDC)
      evalution_metrics_rmse[state,curr_method]<-rmse(curr_results_truth,curr_results_CDC)
      evalution_metrics_cor[state,curr_method]<-cor(curr_results_truth,curr_results_CDC)
      evalution_metrics_MAE[state,curr_method]<-mae(curr_results_truth,curr_results_CDC)
      evalution_metrics_MAPE[state,curr_method]<-mape(curr_results_truth,curr_results_CDC)
      
    }
    
    for (curr_method in colnames(curr_results)[2:length(colnames(curr_results))]) {
      curr_all_results<-cbind(curr_all_results,curr_results[,curr_method])
      evalution_metrics_rmse[state,curr_method]<-rmse(curr_results[,1],curr_results[,curr_method])
      evalution_metrics_cor[state,curr_method]<-cor(curr_results[,1],curr_results[,curr_method])
      evalution_metrics_MAE[state,curr_method]<-mae(curr_results[,1],curr_results[,curr_method])
      evalution_metrics_MAPE[state,curr_method]<-mape(curr_results[,1],curr_results[,curr_method])
    }
    state_results[['2 week ahead']][[state]]<-na.omit(curr_all_results)
    
  }
  # evalution_metrics_rmse[which(is.na(evalution_metrics_rmse))]
  # evalution_metrics_cor<-na.omit(evalution_metrics_cor)
  # evalution_metrics_MAE<-na.omit(evalution_metrics_MAE)
  # evalution_metrics_MAPE<-na.omit(evalution_metrics_MAPE)
  save(state_results,file = 'all_results_state.rda')
  final_evaluation<-matrix(NA,nrow = ncol(evalution_metrics_rmse),ncol = 3)
  colnames(final_evaluation)<-c("RMSE","Cor","MAE")#,"MAPE")
  rownames(final_evaluation)<-colnames(evalution_metrics_rmse)
  final_evaluation[,1]<-round(colMeans(evalution_metrics_rmse),digits = 3)
  final_evaluation[,2]<-round(colMeans(evalution_metrics_cor),digits = 3)
  final_evaluation[,3]<-round(colMeans(evalution_metrics_MAE),digits = 3)
  # final_evaluation[,4]<-round(colMeans(evalution_metrics_MAPE),digits = 4)
  #rownames(final_evaluation)<-colnames(evalution_metrics_rmse)
  final_evaluation<-final_evaluation[order(final_evaluation[,1]),]
  #write.csv(final_evaluation,file = "state-level_two_week_ahead.csv")
  
  rmse<-evalution_metrics_rmse[,c("naive_period","full_model_lasso","AR7_ARIMA700",
                                   "COVIDhub-ensemble",
                                   "COVIDhub-baseline")]
  colnames(rmse)<-c("naive","ARGO","AR7",
                     "COVIDhub-ensemble",
                     "COVIDhub-baseline")
  rmse<-melt(rmse)
  colnames(rmse)<-c("state","method","RMSE")
  #MAE
  mae<-evalution_metrics_MAE[,c("naive_period","full_model_lasso","AR7_ARIMA700",
                                 "COVIDhub-ensemble",
                                 "COVIDhub-baseline")]
  colnames(mae)<-c("naive","ARGO","AR7",
                    "COVIDhub-ensemble",
                    "COVIDhub-baseline")
  mae<-melt(mae)
  colnames(mae)<-c("state","method","MAE")
  #COR
  COR<-evalution_metrics_cor[,c("naive_period","full_model_lasso","AR7_ARIMA700",
                                 "COVIDhub-ensemble",
                                 "COVIDhub-baseline")]
  colnames(COR)<-c("naive","ARGO","AR7",
                    "COVIDhub-ensemble",
                    "COVIDhub-baseline")
  COR<-melt(COR)
  colnames(COR)<-c("state","method","Cor")
  d<-ggplot(rmse, aes(x=method, y=RMSE, fill=method),show.legend = FALSE) +ggtitle("Two-week-ahead")+
    geom_violin()+theme(axis.title.x=element_blank(),
                        axis.text.x=element_blank(),
                        axis.ticks.x=element_blank(),
                        axis.title.y=element_blank(),
                        axis.text.y=element_blank(),
                        axis.ticks.y=element_blank())+
    stat_summary(fun.data=data_summary_m, mult=1, geom="pointrange", color="black", size = 0.2)
  e<-ggplot(mae, aes(x=method, y=MAE, fill=method)) +
    geom_violin()+theme(axis.title.x=element_blank(),
                        axis.text.x=element_blank(),
                        axis.ticks.x=element_blank(),
                        axis.title.y=element_blank(),
                        axis.text.y=element_blank(),
                        axis.ticks.y=element_blank())+
    stat_summary(fun.data=data_summary_m, mult=1, geom="pointrange", color="black", size = 0.2)
  f<-ggplot(COR, aes(x=method, y=Cor, fill=method)) +
    geom_violin()+theme(axis.title.x=element_blank(),
                        axis.text.x=element_blank(),
                        axis.ticks.x=element_blank(),
                        axis.title.y=element_blank(),
                        axis.text.y=element_blank(),
                        axis.ticks.y=element_blank())+
    stat_summary(fun.data=data_summary_c,  geom="pointrange", color="black", size = 0.2)
  state_dist<-ggarrange(a,d,b,e,c,f, 
            ncol = 2, nrow = 3,common.legend = TRUE, legend="bottom")
  
  png(file="test1.png",width=18,height=15,units="cm",res=500)
  print(plot(state_dist))
  dev.off()
  # png(file="test1.png",width=20,height=12,units="cm",res=500)
  # print(state_dist)
  # dev.off()
  #ggsave(plot=state_dist,filename="state_dist.png",path = "D:/Covid-19 Data/New folder/Hosp_Prediction", device='tiff', dpi=100,height = 12,width = 20)
}
#calculate daily MAE and then do weekly aggregation
if(FALSE){
  
  pred_week<-8:14
  pred_start<-as.Date("2021-01-04")
  pred_end<-as.Date("2021-09-27")
  dates<-as.Date(seq(pred_start,pred_end,7))
  setwd("D:/Covid-19 Data/New folder/Hosp_Prediction")
  #load("national_prediction_hosp_admission.rda")
  load("CDC_groups_results.rda")
  ALL_Teams[["Karlen-pypm"]]<-NULL
  ALL_Teams[["IHME-CurveFit"]]<-NULL
  ALL_Teams[["USC-SI_kJalpha"]]<-NULL
  nat_pred<-list_results_step1.State[['US']]

  all_teams_evaluation<-matrix(NA,nrow = length(names(ALL_Teams))+ncol(nat_pred[[1]])-1,ncol = 1)
  colnames(all_teams_evaluation)<-c("MAE")
  rownames(all_teams_evaluation)<-c(colnames(nat_pred[[1]])[2:14],names(ALL_Teams))
  
  ALL_Teams_AE<-NA
  for(curr_team in names(ALL_Teams)){
    curr_pred_team<-ALL_Teams[[curr_team]]
    curr_AE<-0
    for (i in pred_week){
      curr_pred_day<-curr_pred_team[[i]][,'US']
      index(curr_pred_day)<-index(curr_pred_day)-i#store back to forecast date
      truth_day<-nat_pred[[i]][,'truth']
      curr_AE<-curr_AE+abs(truth_day-curr_pred_day)
      
    }
    ALL_Teams_AE<-cbind(ALL_Teams_AE,curr_AE)
  }
  ALL_Teams_AE<-na.omit(ALL_Teams_AE[,-1])
  colnames(ALL_Teams_AE)<-names(ALL_Teams)
  curr_AE_m<-0
    for(i in pred_week){
      curr_pred<-nat_pred[[i]]
      curr_pred<-curr_pred[index(ALL_Teams_AE)]
      curr_AE<-curr_pred[,2:ncol(curr_pred)]
      for(j in 1:ncol(curr_AE)){
        curr_AE[,j]<-abs(curr_AE[,j]-curr_pred[,1])
      }
      curr_AE_m<-curr_AE_m+curr_AE
    }
  ALL_Teams_AE<-cbind(ALL_Teams_AE,curr_AE_m)
  all_teams_evaluation[,1]<-colMeans(ALL_Teams_AE)
  rownames(all_teams_evaluation)<-colnames(ALL_Teams_AE)
  all_teams_evaluation<-as.matrix(all_teams_evaluation[order(all_teams_evaluation),1])
  colnames(all_teams_evaluation)<-'MAE(daily basis then aggregated)'
}

#Three/four weeks ahead
if(FALSE){
  
  pred_week<-15:21
  pred_start<-as.Date("2021-01-04")
  pred_end<-as.Date("2021-09-27")
  dates<-as.Date(seq(pred_start,pred_end,7))
  setwd("D:/Covid-19 Data/New folder/Hosp_Prediction")
  #load("national_prediction_hosp_admission.rda")
 
  nat_pred<-list_results_step1.State[['US']]
  
  all_teams_evaluation<-matrix(NA,nrow = ncol(nat_pred[[1]])-1,ncol = 1)
  colnames(all_teams_evaluation)<-c("MAE")
  rownames(all_teams_evaluation)<-c(colnames(nat_pred[[1]])[2:ncol(nat_pred[[1]])])
  
  curr_AE_m<-0
  for(i in pred_week){
    curr_pred<-nat_pred[[i]]
    curr_pred<-curr_pred[dates]
    curr_AE<-curr_pred[,2:ncol(curr_pred)]
    for(j in 1:ncol(curr_AE)){
      curr_AE[,j]<-abs(curr_AE[,j]-curr_pred[,1])
    }
    curr_AE_m<-curr_AE_m+curr_AE
  }
  
  all_teams_evaluation[,1]<-colMeans(curr_AE_m)
  rownames(all_teams_evaluation)<-colnames(curr_AE_m)
  all_teams_evaluation<-as.matrix(all_teams_evaluation[order(all_teams_evaluation),1])
  colnames(all_teams_evaluation)<-'MAE(daily basis then aggregated)'
}
write.csv(all_teams_evaluation,file = "three-weeks-ahead eva.csv")

